/*
 ============================================================================
 Name        : fastatotext.c
 Author      : M.Barsky
 Description : Converts fasta file into several text files
 An example usage is shown in main, which takes following arguments:
	<input folder> <input file name> <output folder> <output file prefix> <output file numeration start>
	<maxnumberofsequences> <maxlinelengthlength>
 ============================================================================
 */

#include <stdio.h>
#include <stdlib.h>

#define MAX_PATH_LENGTH 200
#define MAX_SEQ_NAME_LENGTH 1000

#define MIN(a, b) ((a)<=(b) ? (a) : (b))

typedef struct FileInfo
{
	char fileName[MAX_PATH_LENGTH];
	int length;
	char stringName[MAX_SEQ_NAME_LENGTH];
}FileInfo;

typedef struct FileData
{
	char FileName[MAX_PATH_LENGTH];
	int fileSize;
	int lengthToIndex;
	int startInMergedFile;
	int startInOriginalFile;
}FileData;


int fastaToTextFiles(char *fastaFileName, FileInfo *files, 
		char *outputFilePrefix, int numerationStart,int *stringsCounter);

int max_number_of_sequences;
int max_line;

//"%s%s_input_info", argv[5],argv[4] - this file will contain 3 int 
// the info needed to continue with suffix tree construction
//first int - number of sequences
//second int - min sequence length
//third int - max sequence length

int numberOfSequences=0;
int minLength=0;
int maxLength=0;


void copyString(char *from, char *to, int *size)
{
	int i=0;
	int endOfLine=0;
	while(!endOfLine)
	{
		if(from[i]==10 ||from[i]==32 ||from[i]=='\0')
		{
			endOfLine=1;
			*size=i;
			to[i]='\0';
		}
		else
		{
			to[i]=from[i];
			i++;
		}
		
	}
}

void printContent(int numFiles,FileInfo *files)
{
	int i;
	
	for(i=0;i<numFiles;i++)
	{
		printf("File number %i has length %d has name %s and represents string %s\n",
				i,files[i].length,files[i].fileName,files[i].stringName);
	}	
	
}

int fastaToTextFiles(char *fastaFileName, FileInfo *files, 
		char *outputFilePrefix, int numerationStart, int *stringsCounter)
{
	int fileNameCounter=numerationStart;
	char currOutputName [MAX_PATH_LENGTH];
	
	char *line=(char*) calloc (max_line, sizeof(char));
	char *tmp=(char*) calloc (max_line, sizeof(char));
	char *key=(char*) calloc (max_line, sizeof(char));	
	
	FILE *fastaFile;
	FILE *outputFile=NULL;

	int totalStringLength=0;
	int writtenBytes;

	if(!(fastaFile= fopen ( fastaFileName , "rb" )))
	{
		printf("Could not open input fasta file \"%s\" \n", fastaFileName);
		return 1;
	}
	
	
	
	while(fgets(line, max_line, fastaFile)!=NULL)
	{
		int lineLength=0;
		copyString(line, tmp, &lineLength);
		
		if(line[0]=='>')
		{			
			if(outputFile!=NULL)
			{				
				sprintf(files[*stringsCounter].fileName,"%s", currOutputName);
				sprintf(files[*stringsCounter].stringName,"%s", key);
				
				files[*stringsCounter].length=totalStringLength;
				if(totalStringLength>maxLength)
					maxLength=totalStringLength;
				if(totalStringLength<minLength || minLength==0 )
					minLength=totalStringLength;
				(*stringsCounter)++;
				fclose(outputFile);
				outputFile=NULL;
				totalStringLength=0;
			}
			copyString(tmp, key, &lineLength);
		}
		else
		{
			totalStringLength+=lineLength;
			if(outputFile==NULL)
			{
				sprintf(currOutputName,"%s_%i.txt", outputFilePrefix,fileNameCounter++);
				outputFile = fopen(currOutputName, "wb");
				if(outputFile==NULL) 
				{
					printf("Error: can't create output txt file %s.\n",currOutputName);
					return 1;
				}
			}
			writtenBytes=fwrite(line, sizeof(char), lineLength, outputFile);
			if(writtenBytes!=lineLength)
			{
				printf("Error: not all txt file was written\n");
				return 1;
			}
		}
	}
	
	sprintf(files[*stringsCounter].fileName,"%s", currOutputName);
	sprintf(files[*stringsCounter].stringName,"%s", key);
	files[*stringsCounter].length=totalStringLength;
	(*stringsCounter)++;
	fclose(outputFile);
	
	return 0;
	
}

int main(int argc, char *argv[])
{

	int numerationStart;
	int stringsCounter=0;
	FileInfo *files;

	char inputfilename [MAX_PATH_LENGTH];
	char outputfileprefix [MAX_PATH_LENGTH];
	char infofilename [MAX_PATH_LENGTH];
	char currOutputName [MAX_PATH_LENGTH];
	FILE *outputFile;

	int written;
int info[3];

	if(argc<9)
	{
		printf("To run: ./fastatotext <inputfolder> <inputfilename> <tempfolder> <outputfileprefix> <outputfolder>  "
				"<numerationstart> <maxnumberofsequences> <maxlinelengthlength>  \n");
		return 1;
	}

	
	
	sprintf(inputfilename,"%s%s", argv[1], argv[2]);
	sprintf(outputfileprefix,"%s%s", argv[3],argv[4]);
	sprintf(infofilename,"%s%s_input_info", argv[5],argv[4]);
	numerationStart=atoi(argv[6]);
	
	max_number_of_sequences=atoi(argv[7]);
	max_line=atoi(argv[8]);
	
	files=(FileInfo*) calloc (max_number_of_sequences, sizeof(FileInfo));
	
	if(fastaToTextFiles(inputfilename, files, 
			outputfileprefix, numerationStart, &stringsCounter))
		return 1;

	//write file infos
	sprintf(currOutputName,"%s_files", inputfilename);
	outputFile = fopen(currOutputName, "wb");
	if(outputFile==NULL) 
	{
		printf("Error: can't create output file for file info: %s.\n",currOutputName);
		return 1;
	}
	written=fwrite(files, sizeof(FileInfo), stringsCounter, outputFile);
	if(written!=stringsCounter)
	{
		printf("Not all file info was written\n");
		return 1;
	}
	fclose(outputFile);
	
	
	printContent(stringsCounter,files);


	//write input_info
	

	
	info[0]=stringsCounter;
	info[1]=minLength;
	info[2]=maxLength;
	outputFile = fopen(infofilename, "wb");
	if(outputFile==NULL)
	{
		printf("Error: can't create output file for input info: %s.\n",infofilename);
		return 1;
	}
	
	written=fwrite(info, sizeof(int), 3, outputFile);
	if(written!=3)
	{
		printf("Not all input info was written\n");
		return 1;
	}
	printf("Summary of the input: number of files=%d min file length=%d max file length=%d\n",info[0],info[1],info[2]);
	
	fclose(outputFile);
	
	return 0;
	
}
